{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example code to apply the modular splicing model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example test variants come from ClinVar BRCA1 variants"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Splicing delta PSI prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from mmsplice.vcf_dataloader import SplicingVCFDataloader\n",
    "from mmsplice import MMSplice, predict_all_table, predict_save\n",
    "from mmsplice.utils import max_varEff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "gtf = '../tests/data/test.gtf'\n",
    "vcf = '../tests/data/test.vcf.gz'\n",
    "fasta = '../tests/data/hg19.nochr.chr17.fa'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "dl = SplicingVCFDataloader(gtf, fasta, vcf, encode=False, split_seq=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'inputs': {'seq': {'acceptor_intron': 'CCTAAGAACTCATACAACCAGGACCCTGGAGTCGATTGATTAGAGCCTAGTCCAGGAGAATGAATTGACACTAATCTCTGCTTGTGTTCTCTGT',\n",
       "   'acceptor': 'TCCAGGAGAATGAATTGACACTAATCTCTGCTTGTGTTCTCTGTCTCCAGCAA',\n",
       "   'exon': 'CAATTGGGCAGATGTGTGAGGCACCTGTGGTGACCCGAGAGTGGGTGTTGGACAGTGTAGCACTCTACCAGTGCCAGGAGCTGGACACCTACCTGATACCCCAGATCCCCCACAGCCACTACTGACTGCAGCCAGCCACAGGTACAGAGCCACAGGACCCCAAGAATGAGCTTACAAAGTGGCCTTTCCAGGCCCTGGGAGCTCCTCTCACTCTTCAGTCCTTCTACTGTCCTGGCTACTAAATATTTTATGTACATCAGCCTGAAAAGGACTTCTGGCTATGCAAGGGTCCCTTAAAGATTTTCTGCTTGAAGTCTCCCTTGGAAATCTGCCATGAGCACAAAATTATGGTAATTTTTCACCTGAGAAGATTTTAAAACCATTTAAACGCCACCAATTGAGCAAGATGCTGATTCATTATTTATCAGCCCTATTCTTTCTATTCAGGCTGTTGTTGGCTTAGGGCTGGAAGCACAGAGTGGCTTGGCCTCAAGAGAATAGCTGGTTTCCCTAAGTTTACTTCTCTAAAACCCTGTGTTCACAAAGGCAGAGAGTCAGACCCTTCAATGGAAGGAGAGTGCTTGGGATCGATTATGTGACTTAAAGTCAGAATAGTCCTTGGGCAGTTCTCAAATGTTGGAGTGGAACATTGGGGAGGAAATTCTGAGGCAGGTATTAGAAATGAAAAGGAAACTTGAAACCTGGGCATGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGCAGATCACTGGAGGTCAGGAGTTCGAAACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAGAAATTAGCCGGTCATGGTGGTGGACACCTGTAATCCCAGCTACTCAGGTGGCTAAGGCAGGAGAATCACTTCAGCCCGGGAGGTGGAGGTTGCAGTGAGCCAAGATCATACCACGGCACTCCAGCCTGGGTGACAGTGAGACTGTGGCTCAAAAAAAAAAAAAAAAAAAGGAAAATGAAACTAGAAGAGATTTCTAAAAGTCTGAGATATATTTGCTAGATTTCTAAAGAATGTGTTCTAAAACAGCAGAAGATTTTCAAGAACCGGTTTCCAAAGACAGTCTTCTAATTCCTCATTAGTAATAAGTAAAATGTTTATTGTTGTAGCTCTGGTATATAATCCATTCCTCTTAAAATATAAGACCTCTGGCATGAATATTTCATATCTATAAAATGACAGATCCCACCAGGAAGGAAGCTGTTGCTTTCTTTGAGGTGATTTTTTTCCTTTGCTCCCTGTTGCTGAAACCATACAGCTTCATAAATAATTTTGCTTGCTGAAGGAAGAAAAAGTGTTTTTCATAAACCCATTATCCAGGACTGTTTATAGCTGTTGGAAGGACTAGGTCTTCCCTAGCCCCCCCAGTGTGCAAGGGCAGTGAAGACTTGATTGTACAAAATACGTTTTGTAAATGTTGTGCTGTTAACACTGCAAATAAACTTGGTAGCAAACACTTCCA',\n",
       "   'donor': 'TTCCANNNNNNNNNNNNN',\n",
       "   'donor_intron': 'NNNNNNNN'},\n",
       "  'mut_seq': {'acceptor_intron': 'CCTAAGAACTCATACAACCAGGACCCTGGAGTCGATTGATTAGAGCCTAGTCCAGGAGAATGAATTGACACTAATCTCTGCTTGTGTTCTCTGT',\n",
       "   'acceptor': 'TCCAGGAGAATGAATTGACACTAATCTCTGCTTGTGTTCTCTGTCTCCAGCAA',\n",
       "   'exon': 'CAATTGTGTGAGGCACCTGTGGTGACCCGAGAGTGGGTGTTGGACAGTGTAGCACTCTACCAGTGCCAGGAGCTGGACACCTACCTGATACCCCAGATCCCCCACAGCCACTACTGACTGCAGCCAGCCACAGGTACAGAGCCACAGGACCCCAAGAATGAGCTTACAAAGTGGCCTTTCCAGGCCCTGGGAGCTCCTCTCACTCTTCAGTCCTTCTACTGTCCTGGCTACTAAATATTTTATGTACATCAGCCTGAAAAGGACTTCTGGCTATGCAAGGGTCCCTTAAAGATTTTCTGCTTGAAGTCTCCCTTGGAAATCTGCCATGAGCACAAAATTATGGTAATTTTTCACCTGAGAAGATTTTAAAACCATTTAAACGCCACCAATTGAGCAAGATGCTGATTCATTATTTATCAGCCCTATTCTTTCTATTCAGGCTGTTGTTGGCTTAGGGCTGGAAGCACAGAGTGGCTTGGCCTCAAGAGAATAGCTGGTTTCCCTAAGTTTACTTCTCTAAAACCCTGTGTTCACAAAGGCAGAGAGTCAGACCCTTCAATGGAAGGAGAGTGCTTGGGATCGATTATGTGACTTAAAGTCAGAATAGTCCTTGGGCAGTTCTCAAATGTTGGAGTGGAACATTGGGGAGGAAATTCTGAGGCAGGTATTAGAAATGAAAAGGAAACTTGAAACCTGGGCATGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGCAGATCACTGGAGGTCAGGAGTTCGAAACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAGAAATTAGCCGGTCATGGTGGTGGACACCTGTAATCCCAGCTACTCAGGTGGCTAAGGCAGGAGAATCACTTCAGCCCGGGAGGTGGAGGTTGCAGTGAGCCAAGATCATACCACGGCACTCCAGCCTGGGTGACAGTGAGACTGTGGCTCAAAAAAAAAAAAAAAAAAAGGAAAATGAAACTAGAAGAGATTTCTAAAAGTCTGAGATATATTTGCTAGATTTCTAAAGAATGTGTTCTAAAACAGCAGAAGATTTTCAAGAACCGGTTTCCAAAGACAGTCTTCTAATTCCTCATTAGTAATAAGTAAAATGTTTATTGTTGTAGCTCTGGTATATAATCCATTCCTCTTAAAATATAAGACCTCTGGCATGAATATTTCATATCTATAAAATGACAGATCCCACCAGGAAGGAAGCTGTTGCTTTCTTTGAGGTGATTTTTTTCCTTTGCTCCCTGTTGCTGAAACCATACAGCTTCATAAATAATTTTGCTTGCTGAAGGAAGAAAAAGTGTTTTTCATAAACCCATTATCCAGGACTGTTTATAGCTGTTGGAAGGACTAGGTCTTCCCTAGCCCCCCCAGTGTGCAAGGGCAGTGAAGACTTGATTGTACAAAATACGTTTTGTAAATGTTGTGCTGTTAACACTGCAAATAAACTTGGTAGCAAACACTTCCA',\n",
       "   'donor': 'TTCCANNNNNNNNNNNNN',\n",
       "   'donor_intron': 'NNNNNNNN'}},\n",
       " 'metadata': {'variant': {'CHROM': '17',\n",
       "   'POS': 41197805,\n",
       "   'REF': 'ACATCTGCC',\n",
       "   'ALT': ['A'],\n",
       "   'STR': \"17:41197805:ACATCTGCC:['A']\"},\n",
       "  'exon': {'chrom': '17',\n",
       "   'start': 41196311,\n",
       "   'end': 41197819,\n",
       "   'strand': '-',\n",
       "   'left_overhang': 100,\n",
       "   'right_overhang': 0,\n",
       "   'annotation': '17:41196311-41197819:-',\n",
       "   'exon_id': 'ENSE00001814242',\n",
       "   'gene_id': 'ENSG00000012048',\n",
       "   'gene_name': 'BRCA1',\n",
       "   'transcript_id': 'ENST00000357654'}}}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next(dl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-08-12 20:27:11,476 [WARNING] From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "/home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/keras/engine/saving.py:292: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually.\n",
      "  warnings.warn('No training configuration found in save file: '\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-08-12 20:27:11,780 [WARNING] From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.cast instead.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2019-08-12 20:27:11,991 [WARNING] From /home/muhammedhasan/Projects/MMSplice/venv/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.cast instead.\n"
     ]
    }
   ],
   "source": [
    "model = MMSplice()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4it [00:11,  2.57s/it]\n"
     ]
    }
   ],
   "source": [
    "dl = SplicingVCFDataloader(gtf, fasta, vcf)\n",
    "\n",
    "predictions = predict_all_table(model, dl, pathogenicity=True, splicing_efficiency=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictionsMax = max_varEff(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>delta_logit_psi</th>\n",
       "      <th>exons</th>\n",
       "      <th>exon_id</th>\n",
       "      <th>gene_id</th>\n",
       "      <th>gene_name</th>\n",
       "      <th>transcript_id</th>\n",
       "      <th>ref_acceptorIntron</th>\n",
       "      <th>ref_acceptor</th>\n",
       "      <th>ref_exon</th>\n",
       "      <th>ref_donor</th>\n",
       "      <th>ref_donorIntron</th>\n",
       "      <th>alt_acceptorIntron</th>\n",
       "      <th>alt_acceptor</th>\n",
       "      <th>alt_exon</th>\n",
       "      <th>alt_donor</th>\n",
       "      <th>alt_donorIntron</th>\n",
       "      <th>pathogenicity</th>\n",
       "      <th>efficiency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>939</th>\n",
       "      <td>17:41251568:GTGAGCCACCATGCCTGGCCTCTTTTGCTCCCTT...</td>\n",
       "      <td>-13.142139</td>\n",
       "      <td>17:41251791-41251894:-</td>\n",
       "      <td>ENSE00003477699</td>\n",
       "      <td>ENSG00000012048</td>\n",
       "      <td>BRCA1</td>\n",
       "      <td>ENST00000461221</td>\n",
       "      <td>-2.928739</td>\n",
       "      <td>1.003448</td>\n",
       "      <td>-3.000968</td>\n",
       "      <td>2.998045</td>\n",
       "      <td>0.683019</td>\n",
       "      <td>-3.894215</td>\n",
       "      <td>-5.617308</td>\n",
       "      <td>-1.16161</td>\n",
       "      <td>-4.262197</td>\n",
       "      <td>0.008081</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-7.652236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>17:41219540:CGATTACAGGCATGCGCCACCGTGCCTCGCCTCA...</td>\n",
       "      <td>-12.980691</td>\n",
       "      <td>17:41219624-41219712:-</td>\n",
       "      <td>ENSE00003492626</td>\n",
       "      <td>ENSG00000012048</td>\n",
       "      <td>BRCA1</td>\n",
       "      <td>ENST00000357654</td>\n",
       "      <td>-2.256955</td>\n",
       "      <td>4.854000</td>\n",
       "      <td>-4.449435</td>\n",
       "      <td>2.749682</td>\n",
       "      <td>0.218514</td>\n",
       "      <td>-2.898835</td>\n",
       "      <td>-6.655025</td>\n",
       "      <td>-1.16161</td>\n",
       "      <td>-3.431215</td>\n",
       "      <td>-0.198021</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-6.342944</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>17:41232465:TGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAG...</td>\n",
       "      <td>-12.446247</td>\n",
       "      <td>17:41234420-41234592:-</td>\n",
       "      <td>ENSE00003527960</td>\n",
       "      <td>ENSG00000012048</td>\n",
       "      <td>BRCA1</td>\n",
       "      <td>ENST00000357654</td>\n",
       "      <td>-2.337047</td>\n",
       "      <td>0.198098</td>\n",
       "      <td>-3.064758</td>\n",
       "      <td>3.751723</td>\n",
       "      <td>0.509226</td>\n",
       "      <td>-3.498240</td>\n",
       "      <td>-5.302096</td>\n",
       "      <td>-1.16161</td>\n",
       "      <td>-2.475021</td>\n",
       "      <td>-0.310342</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-5.989502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>903</th>\n",
       "      <td>17:41248754:ATTTTCATCAGTCACAATTACTGCTGTGTCAATA...</td>\n",
       "      <td>-11.353837</td>\n",
       "      <td>17:41249260-41249306:-</td>\n",
       "      <td>ENSE00003587679</td>\n",
       "      <td>ENSG00000012048</td>\n",
       "      <td>BRCA1</td>\n",
       "      <td>ENST00000357654</td>\n",
       "      <td>-2.896458</td>\n",
       "      <td>0.525802</td>\n",
       "      <td>-3.411850</td>\n",
       "      <td>4.421351</td>\n",
       "      <td>0.797147</td>\n",
       "      <td>-3.806159</td>\n",
       "      <td>-6.966350</td>\n",
       "      <td>-1.16161</td>\n",
       "      <td>-3.872047</td>\n",
       "      <td>0.715352</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-8.457780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>642</th>\n",
       "      <td>17:41242891:GTGGGATACATACTACTGAATGCAAAGGACACCA...</td>\n",
       "      <td>-11.322243</td>\n",
       "      <td>17:41242960-41243049:-</td>\n",
       "      <td>ENSE00003547126</td>\n",
       "      <td>ENSG00000012048</td>\n",
       "      <td>BRCA1</td>\n",
       "      <td>ENST00000357654</td>\n",
       "      <td>-3.370246</td>\n",
       "      <td>4.042363</td>\n",
       "      <td>-3.705677</td>\n",
       "      <td>3.972384</td>\n",
       "      <td>0.524251</td>\n",
       "      <td>-2.947957</td>\n",
       "      <td>-5.811146</td>\n",
       "      <td>-1.16161</td>\n",
       "      <td>-4.428016</td>\n",
       "      <td>0.385194</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-9.122525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    ID  delta_logit_psi  \\\n",
       "939  17:41251568:GTGAGCCACCATGCCTGGCCTCTTTTGCTCCCTT...       -13.142139   \n",
       "396  17:41219540:CGATTACAGGCATGCGCCACCGTGCCTCGCCTCA...       -12.980691   \n",
       "606  17:41232465:TGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAG...       -12.446247   \n",
       "903  17:41248754:ATTTTCATCAGTCACAATTACTGCTGTGTCAATA...       -11.353837   \n",
       "642  17:41242891:GTGGGATACATACTACTGAATGCAAAGGACACCA...       -11.322243   \n",
       "\n",
       "                      exons          exon_id          gene_id gene_name  \\\n",
       "939  17:41251791-41251894:-  ENSE00003477699  ENSG00000012048     BRCA1   \n",
       "396  17:41219624-41219712:-  ENSE00003492626  ENSG00000012048     BRCA1   \n",
       "606  17:41234420-41234592:-  ENSE00003527960  ENSG00000012048     BRCA1   \n",
       "903  17:41249260-41249306:-  ENSE00003587679  ENSG00000012048     BRCA1   \n",
       "642  17:41242960-41243049:-  ENSE00003547126  ENSG00000012048     BRCA1   \n",
       "\n",
       "       transcript_id  ref_acceptorIntron  ref_acceptor  ref_exon  ref_donor  \\\n",
       "939  ENST00000461221           -2.928739      1.003448 -3.000968   2.998045   \n",
       "396  ENST00000357654           -2.256955      4.854000 -4.449435   2.749682   \n",
       "606  ENST00000357654           -2.337047      0.198098 -3.064758   3.751723   \n",
       "903  ENST00000357654           -2.896458      0.525802 -3.411850   4.421351   \n",
       "642  ENST00000357654           -3.370246      4.042363 -3.705677   3.972384   \n",
       "\n",
       "     ref_donorIntron  alt_acceptorIntron  alt_acceptor  alt_exon  alt_donor  \\\n",
       "939         0.683019           -3.894215     -5.617308  -1.16161  -4.262197   \n",
       "396         0.218514           -2.898835     -6.655025  -1.16161  -3.431215   \n",
       "606         0.509226           -3.498240     -5.302096  -1.16161  -2.475021   \n",
       "903         0.797147           -3.806159     -6.966350  -1.16161  -3.872047   \n",
       "642         0.524251           -2.947957     -5.811146  -1.16161  -4.428016   \n",
       "\n",
       "     alt_donorIntron  pathogenicity  efficiency  \n",
       "939         0.008081            1.0   -7.652236  \n",
       "396        -0.198021            1.0   -6.342944  \n",
       "606        -0.310342            1.0   -5.989502  \n",
       "903         0.715352            1.0   -8.457780  \n",
       "642         0.385194            1.0   -9.122525  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictionsMax.sort_values(['delta_logit_psi']).head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If mmsplice `deltalogitPSI` of a variant is bigger than 2 or smaller -2 if is likely to have effect on splicing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f6f83b7c2e8>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAATf0lEQVR4nO3df4xlZX3H8fe3rCCCZRB0xF3SxUhs1U0RJojV1tH1B6BxaatmDZFFaTa2YqVso2ttxOg/mJYiJK1mC9SlIQKilo1gleJOTJtAFVxZASkDrrJbBEF2df3ZSb/94z6rt8vcO/fH3DN3fN6v5GbOeZ7n3vO959z53DPnnnsmMhNJUh1+Y6kLkCQ1x9CXpIoY+pJUEUNfkipi6EtSRVYsdQHdHHvssbl69eqRL+fHP/4xRxxxxMiX069xrQusbVBda7vvvtbP5z+/uYLaLNl66+F5L9ttukTuuOOOxzLzmfN2ZubY3k455ZRswvbt2xtZTr/Gta5MaxtU19pe8YrWbYks2Xrr4Xkv2226RICvZYdc9fCOJFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVZKwvwyBpca3efFPHvk1r5ji3Sz/Arotfv9glqWHu6UtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0JekiiwY+hFxVUQ8GhHfbGt7RkTcEhH3l59Hl/aIiMsjYjYi7oqIk9vus6GMvz8iNozm6UiSuullT/+TwOkHtW0Gbs3ME4FbyzzAGcCJ5bYR+Di03iSAi4CXAKcCFx14o5AkNWfB0M/MrwA/OKh5HbC1TG8FzmprvzpbbgMmIuI44HXALZn5g8x8AriFJ7+RSJJGbND/nDWZmQ+X6e8Bk2V6JfBQ27jdpa1T+5NExEZafyUwOTnJzMzMgCX2bv/+/Y0sp1/jWhdY26C61XbS3r0A7Bhh7ZvWzHXsmzy8ez8wkvXay/Nertt0HA397xIzMyMiF6OY8nhbgC0AU1NTOT09vVgP3dHMzAxNLKdf41oXWNugutY2MQEw0tq7/TvETWvmuGRn90jYdfb0IldET8972W7TMTTo2TuPlMM2lJ+PlvY9wPFt41aVtk7tkqQGDRr624ADZ+BsAG5saz+nnMVzGrCvHAb6IvDaiDi6fID72tImSWrQgod3IuJTwDRwbETspnUWzsXA9RFxHvAd4C1l+M3AmcAs8BPg7QCZ+YOI+Ajw1TLuw5l58IfDkqQRWzD0M/OtHbrWzjM2gXd1eJyrgKv6qk6StKj8Rq4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkWGCv2I+IuIuDsivhkRn4qIp0bECRFxe0TMRsR1EXFoGXtYmZ8t/asX4wlIkno3cOhHxErgz4GpzHwRcAiwHvgocGlmPg94Ajiv3OU84InSfmkZJ0lq0LCHd1YAh0fECuBpwMPAq4AbSv9W4Kwyva7MU/rXRkQMuXxJUh9WDHrHzNwTEX8LfBf4KfAl4A5gb2bOlWG7gZVleiXwULnvXETsA44BHmt/3IjYCGwEmJycZGZmZtASe7Z///5GltOvca0LrG1Q3Wo7ae9eAHaMsPZNa+Y69k0e3r0fGMl67eV5L9dtOo4GDv2IOJrW3vsJwF7g08DpwxaUmVuALQBTU1M5PT097EMuaGZmhiaW069xrQusbVBda5uYABhp7eduvqlj36Y1c1yys3sk7Dp7epEroqfnvWy36Rga5vDOq4FvZ+b3M/N/gM8CLwMmyuEegFXAnjK9BzgeoPQfBTw+xPIlSX0aJvS/C5wWEU8rx+bXAvcA24E3lTEbgBvL9LYyT+n/cmbmEMuXJPVp4NDPzNtpfSB7J7CzPNYW4H3AhRExS+uY/ZXlLlcCx5T2C4HNQ9QtSRrAwMf0ATLzIuCig5ofBE6dZ+zPgDcPszxJ0nD8Rq4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqMlToR8RERNwQEd+KiHsj4qUR8YyIuCUi7i8/jy5jIyIuj4jZiLgrIk5enKcgSerVsHv6lwH/mpm/DfwucC+wGbg1M08Ebi3zAGcAJ5bbRuDjQy5bktSngUM/Io4C/gC4EiAzf5GZe4F1wNYybCtwVpleB1ydLbcBExFx3MCVS5L6Nsye/gnA94F/ioivR8QVEXEEMJmZD5cx3wMmy/RK4KG2++8ubZKkhqwY8r4nA+/OzNsj4jJ+dSgHgMzMiMh+HjQiNtI6/MPk5CQzMzNDlNib/fv3N7Kcfo1rXWBtg+pW20l79wKwY4S1b1oz17Fv8vDu/cBI1msvz3u5btNxNEzo7wZ2Z+btZf4GWqH/SEQcl5kPl8M3j5b+PcDxbfdfVdr+n8zcAmwBmJqayunp6SFK7M3MzAxNLKdf41oXWNugutY2MQEw0trP3XxTx75Na+a4ZGf3SNh19vQiV0RPz3vZbtMxNPDhncz8HvBQRDy/NK0F7gG2ARtK2wbgxjK9DTinnMVzGrCv7TCQJKkBw+zpA7wbuCYiDgUeBN5O643k+og4D/gO8JYy9mbgTGAW+EkZK0lq0FChn5k7gKl5utbOMzaBdw2zPEnScPxGriRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRYYO/Yg4JCK+HhGfL/MnRMTtETEbEddFxKGl/bAyP1v6Vw+7bElSfxZjT/89wL1t8x8FLs3M5wFPAOeV9vOAJ0r7pWWcJKlBQ4V+RKwCXg9cUeYDeBVwQxmyFTirTK8r85T+tWW8JKkhK4a8/8eA9wJPL/PHAHszc67M7wZWlumVwEMAmTkXEfvK+MfaHzAiNgIbASYnJ5mZmRmyxIXt37+/keX0a1zrAmsbVLfaTtq7F4AdI6x905q5jn2Th3fvB0ayXnt53st1m46jgUM/It4APJqZd0TE9GIVlJlbgC0AU1NTOT29aA/d0czMDE0sp1/jWhdY26C61jYxATDS2s/dfFPHvk1r5rhkZ/dI2HX29CJXRE/Pe9lu0zE0zJ7+y4A3RsSZwFOB3wQuAyYiYkXZ218F7Cnj9wDHA7sjYgVwFPD4EMuXJPVp4GP6mfn+zFyVmauB9cCXM/NsYDvwpjJsA3Bjmd5W5in9X87MHHT5kqT+jeI8/fcBF0bELK1j9leW9iuBY0r7hcDmESxbktTFsB/kApCZM8BMmX4QOHWeMT8D3rwYy5MkDcZv5EpSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klSRRbmevqTmrO7yf26lhbinL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVJGBQz8ijo+I7RFxT0TcHRHvKe3PiIhbIuL+8vPo0h4RcXlEzEbEXRFx8mI9CUlSb4bZ058DNmXmC4DTgHdFxAuAzcCtmXkicGuZBzgDOLHcNgIfH2LZkqQBDBz6mflwZt5Zpn8E3AusBNYBW8uwrcBZZXodcHW23AZMRMRxA1cuSepbZObwDxKxGvgK8CLgu5k5UdoDeCIzJyLi88DFmfnvpe9W4H2Z+bWDHmsjrb8EmJycPOXaa68dur6F7N+/nyOPPHLky+nXuNYF1jaobrWddMEFAOz42Me6PsbOPfsWvS6AycPhkZ92H7Nm5VGLvtxenvdy3aZL5ZWvfOUdmTk1X9/Q/zkrIo4EPgNckJk/bOV8S2ZmRPT1rpKZW4AtAFNTUzk9PT1siQuamZmhieX0a1zrAmsbVNfaJiYAFqz93BH956xNa+a4ZGf3SNh19vTiL7iH571st+kYGursnYh4Cq3AvyYzP1uaHzlw2Kb8fLS07wGOb7v7qtImSWrIMGfvBHAlcG9m/l1b1zZgQ5neANzY1n5OOYvnNGBfZj486PIlSf0b5vDOy4C3ATsjYkdp+yvgYuD6iDgP+A7wltJ3M3AmMAv8BHj7EMuWJA1g4NAvH8hGh+6184xP4F2DLk+SNDy/kStJFRn67B1J/Vs9zxk41z74OADrR3R2jgTu6UtSVQx9SaqIoS9JFTH0Jakihr4kVcSzd6QBzXcGzkI2rZkb2bVzpF64py9JFTH0Jakihr4kVcRj+qraIMflpeXMPX1JqoihL0kVMfQlqSKGviRVxA9yJfVsmA++d138+kWsRINyT1+SKmLoS1JFDH1JqoihL0kV8YNcLWvzfbDolSylztzTl6SKGPqSVBFDX5IqYuhLUkX8IFdLzssbS81xT1+SKmLoS1JFGj+8ExGnA5cBhwBXZObFTdeg+fVzmMVz4aXlqdHQj4hDgL8HXgPsBr4aEdsy854m65DUvE47Fdc++DgA67vsRHzy9CNGUlONmt7TPxWYzcwHASLiWmAdMJLQ73XPdVz3Wse1LqlpO/fsW5LfhWEvBz2Ol6KOzBzJA8+7sIg3Aadn5p+U+bcBL8nM89vGbAQ2ltnnA/c1UNqxwGMNLKdf41oXWNugrG0w1taf38rMZ87XMXanbGbmFmBLk8uMiK9l5lSTy+zFuNYF1jYoaxuMtS2eps/e2QMc3za/qrRJkhrQdOh/FTgxIk6IiEOB9cC2hmuQpGo1engnM+ci4nzgi7RO2bwqM+9usoYOGj2c1IdxrQusbVDWNhhrWySNfpArSVpafiNXkipi6EtSRaoI/Yh4c0TcHRH/GxFTbe2viYg7ImJn+fmqDvf/UETsiYgd5XbmqGsrfe+PiNmIuC8iXtfh/idExO1l3HXlA/JFVx77wPPfFRE7OozbVdbnjoj42ihqmWeZPW2fiDi9rMvZiNjcUG1/ExHfioi7IuJzETHRYVxj622h9RARh5XtPVteW6tHWU/bco+PiO0RcU/5nXjPPGOmI2Jf27b+YBO1lWV33UbRcnlZb3dFxMlN1daXzPy1vwG/Q+uLXjPAVFv7i4HnlOkXAXs63P9DwF82XNsLgG8AhwEnAA8Ah8xz/+uB9WX6E8CfNrA+LwE+2KFvF3Bsw9t3we1D68SBB4DnAoeWdfuCBmp7LbCiTH8U+OhSrrde1gPwZ8AnyvR64LqGtuNxwMll+unAf81T2zTw+SZfX71uI+BM4AtAAKcBty9FnQvdqtjTz8x7M/NJ3+zNzK9n5n+X2buBwyPisHGojdblKa7NzJ9n5reBWVqXsfiliAjgVcANpWkrcNYo6y3LfAvwqVEuZwR+eQmQzPwFcOASICOVmV/KzLkyexut76YspV7WwzparyVovbbWlu0+Upn5cGbeWaZ/BNwLrBz1chfROuDqbLkNmIiI45a6qINVEfo9+mPgzsz8eYf+88ufbFdFxNEN1LMSeKhtfjdP/gU4BtjbFirzjVlsvw88kpn3d+hP4EvlcNnGDmNGYaHt08v6HLV30NoTnE9T662X9fDLMeW1tY/Wa60x5ZDSi4Hb5+l+aUR8IyK+EBEvbLCshbbROLzGFjR2l2EYVET8G/Dsebo+kJk3LnDfF9L60/u1HYZ8HPgIrY3+EVqHN97RRG1N6rHOt9J9L//lmbknIp4F3BIR38rMr4yyNobcPqOs7cB6i4gPAHPANR0eZiTrbTmKiCOBzwAXZOYPD+q+k9Z1ZfaXz27+BTixodJ+LbbRr03oZ+arB7lfRKwCPgeck5kPdHjsR9rG/yPw+QZq6+WSFY/T+hNyRdkjG+qyFgvVGRErgD8CTunyGHvKz0cj4nO0DicM/YvR6zrssn1GdgmQHtbbucAbgLVZDv7O8xgjWW/z6GU9HBizu2zzo2i91kYuIp5CK/CvyczPHtzf/iaQmTdHxD9ExLGZOfILnvWwjZbFZWaqPrxTzqS4Cdicmf/RZVz7cbk/BL456tpoXZ5ifTmT4gRaezP/2T6gBMh24E2laQMwyr8cXg18KzN3z9cZEUdExNMPTNP6y2nk66rH7bMklwCJ1j8Nei/wxsz8SYcxTa63XtbDNlqvJWi9tr7c6c1qMZXPDa4E7s3Mv+sw5tkHPl+IiFNpZdjI35B63EbbgHPKWTynAfsy8+FR19a3pf4kuYkbrSDYDfwceAT4Ymn/a+DHwI6227NK3xWUs2mAfwZ2AnfR2rDHjbq20vcBWmda3Aec0dZ+M7866+i5tN4MZoFPA4eNcD1+EnjnQW3PAW5uq+Ub5XY3rcMbTWzfebdPe21l/kxaZ4Q80GBts7SO8x54fX3i4NqaXm/zrQfgw7TemACeWl5Ls+W19dyG1tXLaR2iu6ttfZ0JvPPA6w44v6yjb9D6YPz3Gqpt3m10UG1B659EPVBej1NN1NbvzcswSFJFqj68I0m1MfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRf4PG0gHEV69HBYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.axvline(x=2, color='r')\n",
    "plt.axvline(x=-2, color='r')\n",
    "\n",
    "predictions['delta_logit_psi'].hist(bins=20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For huge vcf files, output is writen to csv file on batch predictions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "4it [00:11,  2.46s/it]\n"
     ]
    }
   ],
   "source": [
    "dl = SplicingVCFDataloader(gtf, fasta, vcf)\n",
    "predict_save(model, dl, output_csv='pred.csv', pathogenicity=True, splicing_efficiency=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ID,exons,exon_id,gene_id,gene_name,transcript_id,delta_logit_psi,ref_acceptorIntron,ref_acceptor,ref_exon,ref_donor,ref_donorIntron,alt_acceptorIntron,alt_acceptor,alt_exon,alt_donor,alt_donorIntron,pathogenicity,efficiency\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41196311-41197819:-,ENSE00001814242,ENSG00000012048,BRCA1,ENST00000357654,0.0018479383059293569,-2.9240594,5.0689387,-4.191132,-3.1975443,0.10508263,-2.9240594,5.0689387,-4.1903567,-3.1975443,0.10508263,0.9086872558907046,-0.11951388100548177\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41196312-41197819:-,ENSE00001312675,ENSG00000012048,BRCA1,ENST00000354071,0.0018508901191378701,-2.9240594,5.0689387,-4.190762,-2.977995,0.10508263,-2.9240594,5.0689387,-4.189985,-2.977995,0.10508263,0.8983492946545182,-0.1195065643553132\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41196821-41197819:-,ENSE00001831829,ENSG00000012048,BRCA1,ENST00000468300,0.006489664576316575,-2.9240594,5.0689387,-3.8693066,-2.1827195,0.10508263,-2.9240594,5.0689387,-3.865532,-2.1827195,0.10508263,0.8472335991054711,-0.10800844861539596\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41197579-41197819:-,ENSE00002914501,ENSG00000012048,BRCA1,ENST00000586385,0.04488832669926228,-2.9240594,5.0689387,-3.507481,-3.032851,0.10508263,-2.9240594,5.0689387,-3.4788947,-3.032851,0.10508263,0.8946106370974525,-0.012829804897474664\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41197645-41197819:-,ENSE00001937547,ENSG00000012048,BRCA1,ENST00000493795,0.04854488531130818,-2.9240594,5.0689387,-3.6942868,-3.278372,0.10508263,-2.9240594,5.0689387,-3.6633377,-3.278372,0.10508263,0.9083581151285743,-0.003766304501151105\r\n",
      "17:41197805:ACATCTGCC:['A'],17:41197694-41197819:-,ENSE00003513816,ENSG00000012048,BRCA1,ENST00000461221,0.04037057658363263,-2.9240594,5.0689387,-3.9611068,-3.1458025,0.10508263,-2.9240594,5.0689387,-3.9354396,-3.1458025,0.10508263,0.9047552398416338,-0.02402793798048028\r\n",
      "17:41197809:CTGCCCAAT:['C'],17:41196311-41197819:-,ENSE00001814242,ENSG00000012048,BRCA1,ENST00000357654,-0.20899207470271014,-2.9240594,5.0689387,-4.191132,-3.1975443,0.10508263,-2.9240594,4.7664943,-4.180473,-3.1975443,0.10508263,0.9732792117061092,-0.23590875031572456\r\n",
      "17:41197809:CTGCCCAAT:['C'],17:41196312-41197819:-,ENSE00001312675,ENSG00000012048,BRCA1,ENST00000354071,-0.20898474983191248,-2.9240594,5.0689387,-4.190762,-2.977995,0.10508263,-2.9240594,4.7664943,-4.1800942,-2.977995,0.10508263,0.9700127570929378,-0.23589519729245675\r\n",
      "17:41197809:CTGCCCAAT:['C'],17:41196821-41197819:-,ENSE00001831829,ENSG00000012048,BRCA1,ENST00000468300,-0.2020871631641508,-2.9240594,5.0689387,-3.8693066,-2.1827195,0.10508263,-2.9240594,4.7664943,-3.8505564,-2.1827195,0.10508263,0.9531234913363504,-0.22313276704862176\r\n"
     ]
    }
   ],
   "source": [
    "! head pred.csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exon dataloader"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exon dataloader run mmsplice on specific set of variant-exon pairs provided by csv file which contains columns of ('chrom', 'start', 'end', 'strand', 'pos', 'ref', 'alt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "exons = '../tests/data/test_exons.csv'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`ExonDataset` can be used interchangable with `SplicingVCFDataloader` for functions like `predict_save`, `predict_all_table`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1it [00:00,  5.54it/s]\n"
     ]
    }
   ],
   "source": [
    "from mmsplice.exon_dataloader import ExonDataset\n",
    "\n",
    "dl = ExonDataset(exons, fasta)\n",
    "exon_pred = predict_all_table(model, dl, pathogenicity=True, splicing_efficiency=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f6f83daa0f0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAOLUlEQVR4nO3df4xl9VnH8fdTtrUbprIgMK4LcUggGMK2q0ywpv4xA23F0nSpQdKGNLsRM5pI0sQ1upTEH8HGrQSJ0ca4CnH/qA4ES5awRYsrIzGR2t0WulCoEFzabnQ31QU7SDTTPv4xZ81kmJ17Z+49995neL+Sydzvueec7/PsZD977nfuuRuZiSSpnrcNuwBJ0voY4JJUlAEuSUUZ4JJUlAEuSUVtGuRkF154YU5MTAxsvtdff51zzz13YPO1zX5Gm/2Mtsr9HD169DuZedHy7QMN8ImJCY4cOTKw+ebm5piamhrYfG2zn9FmP6Otcj8R8cpK211CkaSiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiBnonpiQBTOw9NPA592xfYGrgs7bLK3BJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKsoAl6SiDHBJKqqr/5U+Io4D3wW+Byxk5mREXAA8AEwAx4FbMvN0O2VKkpZbyxX4dGbuyMzJZrwXOJyZVwCHm7EkaUB6WULZCRxoHh8Abuq9HElStyIzO+8U8a/AaSCBP83M/RHxamZuaZ4P4PSZ8bJjZ4AZgPHx8WtmZ2f7Wf+q5ufnGRsbG9h8bbOf0WY/3Tt24rVWzrua8c1w8QXnDXzefpienj66ZPXj/3W1Bg78dGaeiIiLgccj4oWlT2ZmRsSK/xJk5n5gP8Dk5GROTU2trfIezM3NMcj52mY/o81+urd776FWzruaPdsXuGUD/XygyyWUzDzRfD8FPAxcC5yMiK0AzfdTbRUpSXqzjgEeEedGxLvOPAY+CDwLPALsanbbBRxsq0hJ0pt1s4QyDjy8uMzNJuAvM/NvIuLLwIMRcRvwCnBLe2VKkpbrGOCZ+TLwnhW2/wdwfRtFSZI6805MSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSrKAJekogxwSSpq07ALkKRBmdh7aCjzHt93Yyvn9QpckooywCWpKANckorqOsAj4pyI+GpEPNqML4uIL0XESxHxQES8o70yJUnLreUK/JPA80vGnwHuzczLgdPAbf0sTJK0uq4CPCIuAW4E/rwZB3Ad8FCzywHgpjYKlCStLDKz804RDwG/B7wL+DVgN/BUc/VNRFwKPJaZV69w7AwwAzA+Pn7N7Oxs34rvZH5+nrGxsYHN1zb7GW32071jJ15r5byrGd8MJ98Y+LQAbN92Xk/HT09PH83MyeXbO74PPCI+DJzKzKMRMbXWiTNzP7AfYHJyMqem1nyKdZubm2OQ87XNfkab/XRv9xDej71n+wL3HBvOrS/Hb51q5bzddPM+4CMR8SHgncAPAn8IbImITZm5AFwCnGilQknSijqugWfmHZl5SWZOAB8D/j4zbwWeAG5udtsFHGytSknSm/TyPvDfAH41Il4Cfgi4rz8lSZK6saYFocycA+aaxy8D1/a/JElSN7wTU5KKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqSgDXJKKMsAlqaiOAR4R74yIf46IZyLiuYj4nWb7ZRHxpYh4KSIeiIh3tF+uJOmMbq7A/we4LjPfA+wAboiI9wKfAe7NzMuB08Bt7ZUpSVquY4Dnovlm+PbmK4HrgIea7QeAm1qpUJK0osjMzjtFnAMcBS4HPgvcDTzVXH0TEZcCj2Xm1SscOwPMAIyPj18zOzvbv+o7mJ+fZ2xsbGDztc1+Rpv9dO/YiddaOe9qxjfDyTcGPi0A27ed19Px09PTRzNzcvn2Td0cnJnfA3ZExBbgYeDHup04M/cD+wEmJydzamqq20N7Njc3xyDna5v9jDb76d7uvYdaOe9q9mxf4J5jXUVe3x2/daqV867pXSiZ+SrwBPBTwJaIOPOncQlwos+1SZJW0c27UC5qrryJiM3AB4DnWQzym5vddgEH2ypSkvRm3bye2AocaNbB3wY8mJmPRsTXgdmI+F3gq8B9LdYpSVqmY4Bn5teAH19h+8vAtW0UJUnqzDsxJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJakoA1ySijLAJamojgEeEZdGxBMR8fWIeC4iPtlsvyAiHo+IF5vv57dfriTpjG6uwBeAPZl5FfBe4Fci4ipgL3A4M68ADjdjSdKAdAzwzPy3zPxK8/i7wPPANmAncKDZ7QBwU1tFSpLeLDKz+50jJoAngauBb2bmlmZ7AKfPjJcdMwPMAIyPj18zOzvbe9Vdmp+fZ2xsbGDztc1+Rpv9dO/YiddaOe9qxjfDyTcGPi0A27ed19Px09PTRzNzcvn2rgM8IsaAfwA+nZmfj4hXlwZ2RJzOzFXXwScnJ/PIkSNrLH395ubmmJqaGth8bbOf0WY/3ZvYe6iV865mz/YF7jm2aeDzAhzfd2NPx0fEigHe1btQIuLtwF8Dn8vMzzebT0bE1ub5rcCpniqUJK1JN+9CCeA+4PnM/IMlTz0C7Goe7wIO9r88SdLZdPN64n3AJ4BjEfF0s+1TwD7gwYi4DXgFuKWdEiVJK+kY4Jn5j0Cc5enr+1uOJKlb3okpSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUUZ4JJUlAEuSUV1DPCIuD8iTkXEs0u2XRARj0fEi83389stU5K0XDdX4H8B3LBs217gcGZeARxuxpKkAeoY4Jn5JPCfyzbvBA40jw8AN/W5LklSB5GZnXeKmAAezcyrm/GrmbmleRzA6TPjFY6dAWYAxsfHr5mdne1P5V2Yn59nbGxsYPO1zX5Gm/1079iJ11o572rGN8PJNwY+LQDbt53X0/HT09NHM3Ny+fZNPZ0VyMyMiLP+K5CZ+4H9AJOTkzk1NdXrlF2bm5tjkPO1zX5Gm/10b/feQ62cdzV7ti9wz7GeI29djt861cp51/sulJMRsRWg+X6qfyVJkrqx3gB/BNjVPN4FHOxPOZKkbnXzNsK/Av4JuDIivh0RtwH7gA9ExIvA+5uxJGmAOi4IZebHz/LU9X2uRZK0Bt6JKUlFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVJQBLklFGeCSVFTH/9RYUrsm9h4adgkr2rN9gd0jWpsWeQUuSUUZ4JJUlAEuSUW5Bq6RMqrrwd1wzViD5hW4JBVlgEtSUWWWUNbz0rofL2mP77uxp+MrGtQyhksOUm96ugKPiBsi4hsR8VJE7O1XUZKkztYd4BFxDvBZ4GeBq4CPR8RV/SpMkrS6Xq7ArwVeysyXM/N/gVlgZ3/KkiR1Epm5vgMjbgZuyMxfbMafAH4yM29ftt8MMNMMrwS+sf5y1+xC4DsDnK9t9jPa7Ge0Ve7nRzPzouUbW/8lZmbuB/a3Pc9KIuJIZk4OY+422M9os5/RttH6gd6WUE4Aly4ZX9JskyQNQC8B/mXgioi4LCLeAXwMeKQ/ZUmSOln3EkpmLkTE7cDfAucA92fmc32rrD+GsnTTIvsZbfYz2jZaP+v/JaYkabi8lV6SijLAJamoDR3gEfHbEXEiIp5uvj407Jr6JSL2RERGxIXDrqUXEXFXRHyt+fl8MSJ+ZNg19SIi7o6IF5qeHo6ILcOuqRcR8fMR8VxEfD8iSr4FbyN/5MeGDvDGvZm5o/n6wrCL6YeIuBT4IPDNYdfSB3dn5rszcwfwKPCbwy6oR48DV2fmu4F/Ae4Ycj29ehb4OeDJYReyHhv9Iz/eCgG+Ed0L/DpQ/jfQmflfS4bnUrynzPxiZi40w6dYvD+irMx8PjMHefd0v23oj/x4KwT47c3L2fsj4vxhF9OriNgJnMjMZ4ZdS79ExKcj4lvArdS/Al/qF4DHhl3EW9w24FtLxt9utm0IZT4P/Gwi4u+AH17hqTuBPwHuYvGq7i7gHhb/Uo20Dj19isXlkzJW6yczD2bmncCdEXEHcDvwWwMtcI069dPscyewAHxukLWtRzf9aDSVD/DMfH83+0XEn7G4xjryztZTRGwHLgOeiQhYfHn+lYi4NjP/fYAlrkm3PyMWw+4LjHiAd+onInYDHwauzwI3Wqzh51PRhv7Ijw29hBIRW5cMP8riL2TKysxjmXlxZk5k5gSLLwd/YpTDu5OIuGLJcCfwwrBq6YeIuIHF3098JDP/e9j1aGN/5Ef5K/AOfj8idrC4hHIc+KXhlqMV7IuIK4HvA68Avzzkenr1x8APAI83r5KeysyyPUXER4E/Ai4CDkXE05n5M0Muq2tFPvJj3byVXpKK2tBLKJK0kRngklSUAS5JRRngklSUAS5JRRngklSUAS5JRf0ftv7tStDt9/8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "exon_pred['delta_logit_psi'].hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "1it [00:00,  5.74it/s]\n"
     ]
    }
   ],
   "source": [
    "dl = ExonDataset(exons, fasta)\n",
    "predict_save(model, dl, output_csv='pred_exon.csv', pathogenicity=True, splicing_efficiency=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
